* This file merges all the datasets used 
***********************************************
clear all
set mem 9000m
set matsize 2000
set maxvar 8000



**************************
* TVA DUMMIES
**************************
infile fips using ~/tva/data/tva_county_dummy/tvacounties.txt
summ
g tva = 1
sort fips
save tmp, replace

***************************
* READ STATE-LEVEL DATA
* AND TAKE 1930 INCOME VARIABLE
***************************
u ~/tva/data/valentina/state_variables/state_level_data
keep state pcp_income_30
sort state
save tmp7, replace


***************************
* WAGE DATA
* THESE ARE THE
* CORRECT MANUF AND TRADE (RETAIL+WHOLESALE) WAGES.
******************************
u ~/tva/data/valentina/county_variables/new/tva1
keep fips mwage* pcmwage* pctwage* var88_county72 var89_county72
duplicates report fips
duplicates drop fips, force
sort fips
save tmp76, replace

* mwage is the total county level manufacturing wages in thousands of dollars.
* For 1900, 1920, 1930, it is from the Census of Population
* and it is given in dollars, so divided by 1,000. For 1940 defined as 1939 wages, from the Census of Population. For 1950 defined as
* 1954 wages from Economic Census. For 1960 defined as 1963 wages from Economic Census. 
* For 1970 is defined as 1972 wages from Economic Census, given in millions
* of dollars, so multiplied by 1,000. For 1980 defined as 1982 wages from Economic Census, given in millions of dollars, so
* multiplied by 1,000. For 1990 defined as 1987 wages from Economic Census, given in millions of dollars, so multiplied
* by 1,000. For 2000, defined as 1997 wages from Economic Census.

* pcmwage is equal to mwage divided by the relevant number of workers in thousand
* For 1900, 1920, 1930, and 1940 
* mage is divided by number of manufacturing workers in the county,  from the Census of Population. 
* For 1950 mwage is divided by 
* the 1954 number of manufacturing workers, from the Economic Census. For 1960 mwage is divided by the 1963 number of manufacturing workers 
* from Economic Census. 
* For 1970 mwage is divided by the 1972 number of manufacturing workers, from Economic Census.
* For 1980 mwage is divided by 1982 number of manufacturing workers, from Economic Census.
* For 1990 mwage is divided by the 1987 number of manufacturing workers, from Economic Census
* For 2000, mwage is divided by the 1997 number of manufacturing workers, from Economic Census.


***************************
* COUNTY-LEVEL DATA
***************************
u ~/tva/data/valentina/county_variables/tva_update //this file is built by tva_update.do (certified by Valentina 7/14/2011)
duplicates report fips
drop mwage* twage*
drop _merge
sort fips
merge 1:1 fips using tmp76
tab _merge
keep if _merge ==3
drop _merge

merge 1:1 fips using tmp
tab _merge
drop if _merge ==2
replace tva = 0 if tva ==.


****************************
*  Merge on Fishback Data  *
****************************
drop _merge
drop if county==.|state==.

merge 1:1 county state using /accounts/projects/tva/tva/data/topography/fishback
tab _merge
drop if _merge==2
drop N10


****************************
*  Merge on Ag Land Values *
****************************
drop _merge
merge 1:1 fips using /accounts/projects/tva/tva/data/agricultural_land/data
tab _merge
drop if _merge==2
duplicates report fips


****************************
*  Merge on TVA 'Donut'    *
****************************
drop _merge
*merge 1:1 fips using /accounts/projects/tva/tva/data/border_counties/donut
joinby fips using /accounts/projects/tva/tva/data/border_counties/donut, unmatched(both)
tab _merge
drop if _merge==2


****************************
*  Merge on Correct Employment     
* Variables
****************************
drop _merge
merge 1:1 fips using ~/tva/data/valentina/county_variables/enrico/enrico_jobs

tab _merge
drop if _merge==2
duplicates report fips


*******************************
*  Merge on Housing Val/Rents *
*******************************
drop _merge
merge 1:1 fips using ~/tva/data/valentina/county_variables/RawData/housingvals
tab _merge
drop if _merge==2
duplicates report fips

drop _merge
merge 1:1 fips using ~/tva/data/valentina/county_variables/RawData/county62_1, keepusing(var61_county62 var63_county62)

ren var61_county62 medhsval60
ren var63_county62 medrnt60
tab _merge
drop if _merge==2
duplicates report fips



*******************************
* Merge on weather variables  *
*******************************
drop _merge

merge 1:1 fips using /accounts/projects/tva/tva/data/weather/JUL_MEAN_IDW200_365_1968_2002.dta
foreach var of varlist tmin tmean tmax{
        ren `var' `var'_jul
	}
	drop if _merge==2
	drop _merge

	merge 1:1 fips using /accounts/projects/tva/tva/data/weather/JAN_MEAN_IDW200_365_1968_2002.dta
	foreach var of varlist tmin tmean tmax{
	        ren `var' `var'_jan
		}
		drop if _merge==2


****************************
* Merge extra manuf vars   *
****************************
drop _merge
merge 1:1 fips using ~/tva/data/valentina/county_variables/enrico/manuf2/data2.dta
tab _merge
drop if _merge==2


drop _merge
merge 1:1 fips using ~/tva/data/valentina/county_variables/enrico/manuf3/tva.dta
tab _merge
drop if _merge==2

drop _merge
merge 1:1 fips using ~/tva/data/valentina/county_variables/new/new/tva3.dta
tab _merge
drop if _merge==2
summ *merge*
drop _merge*


****************************
* PROPOSED AUTHORITIES
******************************
sort fips
merge fips using ~/tva/data/authorities/data5/data
tab _merge
drop if _merge==2
drop _merge
tab aut_euclidean1
tab aut_euclidean2
tab aut_euclidean3
tab aut_cent
tab aut_euclidean5
tab aut_euclidean6
g       aut4 =0
replace aut4 =1  if max(aut_euclidean1,aut_euclidean2,aut_euclidean3,aut_cent,aut_euclidean5,aut_euclidean6) ==1
summ fips tva aut_*
tab tva aut4
drop aut_eucl* aut_cent*



****************************
*  Cleanup and Definition  *
****************************

* State and region
drop state
g state = int(fips/1000)
drop if state ==51 | state ==52 | state ==2 | state ==3 | state ==15
g northeast =0
g midwest=0
g south=0
g west=0
		replace northeast =1 if state == 9 | state == 23 | state == 25 | state == 33 | state == 44 | state  == 50 | state == 34 | state == 36 | state == 42
		replace midwest=1    if state == 17 | state == 18 | state == 26 | state == 39 | state == 55 | state ==19 | state == 20 | state == 27 | state == 29 | state == 31 | state == 38 | state == 46
		replace south=1      if state ==10 | state == 11 | state == 12 | state == 13 | state == 24 | state  == 37 | state == 45 | state == 51   | state == 54 | state == 1 | state == 21 | state == 28  | state == 47 | state == 5 | state == 22 | state == 40 | state == 48
		replace west=1       if state == 4 | state == 8 | state == 16 | state == 30 | state == 32 | state == 35 | state == 49 | state == 56 | state == 2 | state == 6 | state == 15 | state == 41 | state ==53
		g       region =1 if northeast ==1
		replace region =2 if midwest ==1
		replace region =3 if south ==1
		replace region =4 if west ==1


* CPI
g cpi890= 5 
g cpi0  = 7
g cpi10 = 9
g cpi20 = 20
g cpi30 = 16.7
g cpi40 = 14
g cpi50 = 24.1
g cpi60 = 29.6
g cpi70 = 38.8
g cpi80 = 82.4
g cpi90 = 130.7
g cpi2000 = 172.2


* Merge the state-level variable
sort state
merge state using tmp7
tab _merge
drop if tva ==.
rm tmp.dta
rm tmp7.dta
rm tmp76.dta


***********************************************
*Drop Donut and counties with missing lat/long*
***********************************************
drop if border_county==1|latitude==.|longitud==.


*******************************************************
* Drop counties with very low populations in any year *
*******************************************************
drop if pop0<1000|pop10<1000|pop20<1000|pop30<1000|pop40<1000|pop50<1000|pop60<1000|pop70<1000|pop80<1000|pop90<1000|pop2000<1000



save build, replace





